Table calculus
`dplyr`
`dbplyr`
`dtplyr`
`data.table`
`arrow`
Published

December 13, 2025

M1 MIDS/MFA/LOGOS

Université Paris Cité

Année 2024

Course Homepage

Moodle

https://dbplyr.tidyverse.org

Code
library(dplyr, warn.conflicts = FALSE)
library(dbplyr, warn.conflicts = FALSE)
library(DBI)
library(RPostgreSQL)
Code
con <- DBI::dbConnect(
    RPostgreSQL::PostgreSQL(),
    user=Sys.getenv('USERNAME'),
    password=rstudioapi::askForPassword(),
    dbname="bd_2023-24",
    host="localhost",
    port=5436
)

With DBI::...

Code
dbGetQuery(con, 
    "SELECT 
        count(*) 
    FROM 
        nycflights.flights ;"
)

https://dbplyr.tidyverse.org/index.html

Code
res <- dbSendQuery(con, 
    "SELECT 
        origin, 
        count(*) AS n  
    FROM 
        nycflights.flights
    GROUP BY 
        origin"
)

df <- dbFetch(res)

dbClearResult(res)

head(df)

With dbplyr::...

Code
Code
tbl(con, 
    sql(
        "SELECT 
            origin, 
            count(*) AS n  
        FROM 
            nycflights.flights
        GROUP BY 
            origin")
)
Code
pg_flights <- tbl(con,I("nycflights.flights"))
pg_airlines <- tbl(con,I("nycflights.airlines"))
Code
pg_flights |> 
    count(by=origin)
Code
pg_flights |> 
    group_by(origin, dest) |> 
    summarise(mean_arr_delay= mean(arr_delay, na.rm=T))
Code
pg_flights |> 
    group_by(origin, dest) |> 
    summarise(across(ends_with("delay"), \(x) mean(x, na.rm=TRUE)))
Code
pg_airlines |> 
    glimpse()
Code
pg_flights |> 
    inner_join(pg_airlines, by=c("carrier")) |> 
    group_by(origin, carrier) |> 
    summarise(N=n(), across(ends_with("delay"), .fns=\(x) mean(x, na.rm=TRUE), .names="Avg {.col}"))
Code

Enjoy!